import random
import time
from lxml import etree
import requests

url = "https://www.shicimingju.com/book/sanguoyanyi.html"

headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36'
}

req = requests.get(url, headers=headers)
req.encoding = req.apparent_encoding  # 自动识别编码,有时候可能不起作用
# print(req.text)
tree = etree.HTML(req.text)

# 获取标题,直接复制第一章xpath
# title_list = tree.xpath('//*[@id="main_left"]/div/div[4]/ul/li[1]/a')
# print(title_list.text())
# 再修改得到所有章节
title_list = tree.xpath('//div[@class="book-mulu"]/ul/li/a/text()')
# 获取超链接
href_list = tree.xpath('//div[@class="book-mulu"]/ul/li/a/@href ')

for i in range(len(title_list)):
    # print(title_list[i])
    # 直接输出是/book/sanguoyanyi/32.html，不完整
    # print(href_list[i])
    # 拼接完整url
    url = "https://www.shicimingju.com" + href_list[i]
    # print(url)
    req = requests.get(url, headers=headers)
    req.encoding = req.apparent_encoding
    tree = etree.HTML(req.text)
    # 对应章节内容
    con = tree.xpath('//div[@class="card bookmark-list"]//text()')
    print(title_list[i])
    print(con)
    time.sleep(random.randint(1, 3))
